library(tidyverse)
library(plotly)
library(readxl)
library(lubridate)

knitr::opts_chunk$set(
  fig.width = 10,
  fig.asp = .2,
  out.width = "90%",
  dpi = 300,
  warning = FALSE,
  message = FALSE
)
#read crime_df data
crime_df = read_csv("data/crime_df.csv")
head(crime_df)
## # A tibble: 6 × 18
##    ...1 date       hour_of…¹ borough level offense offen…² place suspe…³ suspe…⁴
##   <dbl> <date>         <dbl> <chr>   <chr> <chr>   <chr>   <chr> <chr>   <chr>  
## 1     1 2017-12-22         0 MANHAT… FELO… GRAND … LARCEN… BAR/… UNKNOWN UNKNOWN
## 2     2 2017-12-14         9 QUEENS  VIOL… HARRAS… HARASS… COMM… 25-44   BLACK  
## 3     3 2017-02-01         8 STATEN… MISD… SEX CR… SEXUAL… RESI… UNKNOWN UNKNOWN
## 4     4 2017-12-29        15 QUEENS  MISD… CRIMIN… CRIMIN… CHAI… <NA>    <NA>   
## 5     5 2017-03-01         2 MANHAT… VIOL… HARRAS… HARASS… RESI… UNKNOWN WHITE  
## 6     6 2017-12-22        10 MANHAT… MISD… PETIT … LARCEN… STRE… UNKNOWN UNKNOWN
## # … with 8 more variables: suspect_sex <chr>, victim_age <chr>,
## #   victim_race <chr>, victim_sex <chr>, day_of_week <chr>, mon <dbl>,
## #   latitude <dbl>, longitude <dbl>, and abbreviated variable names
## #   ¹​hour_of_day, ²​offense_classification, ³​suspect_age, ⁴​suspect_race
#define crime_df_map for our map
crime_df_map = drop_na(crime_df, latitude)

Map draft

crime_plot =
  crime_df %>% 
  filter(!is.na(borough)) %>%
  plot_ly(
    lat = ~latitude, 
    lon = ~longitude, 
    frame = ~hour_of_day,
    type = "scattermapbox", 
    mode = "markers", 
    alpha = 0.2,
    color = ~borough) %>% 
  layout(
    mapbox = list(
      style = 'carto-positron',
      zoom = 9,
      center = list(lon = -73.9, lat = 40.7)))
crime_plot

Map based on zipcodes

library(sp) #for analyzing spatial data
library(sf) #for analyzing spatial data
library(rgdal) #for importing zips shapefile and transform CRS
library(leaflet) #for interactive maps
library(tigris) #geojoin
library(htmlwidgets) #save interactive maps

Get the zip codes

#import zips shapefile and transform CRS 
zips = readOGR("data/cb_2015_us_zcta510_500k/cb_2015_us_zcta510_500k.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\30535\OneDrive\Documents\Columbia courses\8105\crimeweather_final_project.github.io\data\cb_2015_us_zcta510_500k\cb_2015_us_zcta510_500k.shp", layer: "cb_2015_us_zcta510_500k"
## with 33144 features
## It has 5 fields
## Integer64 fields read as strings:  ALAND10 AWATER10
zips = spTransform(zips, CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
#extract only lon and lat
crime_lat_long = select(crime_df_map, longitude, latitude)

#transform coordinates into a SpatialPointsDataFrame
crime_spdf = SpatialPointsDataFrame(coords = crime_lat_long, data = crime_lat_long, proj4string = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))

#subset only the zipcodes in which points are found
crime_zips = zips[crime_spdf, ]

#redefine crime_df_map to include zipcodes
crime_df_map = cbind(crime_df_map, over(crime_spdf, crime_zips[,"ZCTA5CE10"]))
crime_df_map = rename(crime_df_map, zip = ZCTA5CE10)

300 crime incidents can’t be matched to a zip code, so we drop them.

crime_df_map =
  crime_df_map %>%
  drop_na(zip)

Modified zip code

In order to make meaningful data analysis, we want zip codes represent regions with similar population. However, some zip code regions are very small, among which there are even some buildings that have their own zip codes. To deal with the challenges of zip codes, we convert zip codes to modified zip codes to account for discrepancies in population size. Modified zip codes represent regions with similar population size. In order to make the conversion, we use a conversion table ZCTA-to-MODZCTA.csv obtained from here.

zcta_conv = read_csv("data/ZCTA-to-MODZCTA.csv")
zcta_conv$ZCTA = as.character(zcta_conv$ZCTA)
zcta_conv$MODZCTA = as.character(zcta_conv$MODZCTA)
# Convert zip to mod_zip
crime_df_map = crime_df_map %>%
  left_join(rename(zcta_conv, zip = ZCTA), by = "zip") %>%
  rename(mod_zip = MODZCTA)

0 crime incidents can’t be matched to a modified zip code and leave NA values in the mod_zip column. This is because the 0 crime incidents happened in zip code 10550, which is not in New York City. Therefore, we drop the 0 observations.

crime_df_map = drop_na(crime_df_map, mod_zip)

Number of crimes by modified zip codes

We can then obtain number of crimes in each zip region, sorting in descending order.

number_of_crimes = 
  crime_df_map %>% 
  group_by(mod_zip) %>% 
  summarize(number_of_crimes = n()) %>%
  arrange(desc(number_of_crimes))
number_of_crimes
## # A tibble: 177 × 2
##    mod_zip number_of_crimes
##    <chr>              <int>
##  1 11212               2125
##  2 11207               2114
##  3 11208               2044
##  4 10467               1911
##  5 10456               1899
##  6 10029               1806
##  7 10457               1757
##  8 11206               1673
##  9 10452               1650
## 10 10027               1631
## # … with 167 more rows

Crime rate by modified zip codes

In order to obtain crime rate, we get population of each zip code region from here.

pop_zip = read_csv("data/csvData.csv")
pop_zip$zip = as.character(pop_zip$zip)

#filter out regions in New York City
pop_zip = pop_zip %>%
  filter(county %in% c("New York", "Kings", "Queens", "Bronx", "Richmond"))

The dataset pop_zip lacks population data in zip code region 10065, so we manually add it in (data source here).

pop_zip[nrow(pop_zip) + 1,] = list("10065","    
New York City", "New York", 30339)

We then convert zip code to modified zip code, and get the population in each modified zip code region.

pop_mod_zip = pop_zip %>%
  left_join(rename(zcta_conv, zip = ZCTA), by = "zip") %>%
  rename(mod_zip = MODZCTA) %>%
  group_by(mod_zip) %>%
  summarize(population = sum(population))

Add population of each modified zip code region into number_of_crimes, and calculate crime rate in each modified zip code region. Crime rates are sorted in descending order.

crime_rate = left_join(number_of_crimes, pop_mod_zip, by = "mod_zip")
crime_rate$crime_rate = crime_rate$number_of_crimes / crime_rate$population * 100
crime_rate = 
  crime_rate %>%
  select(mod_zip, crime_rate) %>%
  group_by(crime_rate) %>%
  arrange(desc(crime_rate))
as.tibble(crime_rate)
## # A tibble: 177 × 2
##    mod_zip crime_rate
##    <chr>        <dbl>
##  1 10075        11.2 
##  2 10018         9.00
##  3 10001         6.23
##  4 10004         6.19
##  5 10006         5.61
##  6 10036         4.96
##  7 10035         4.03
##  8 10455         3.46
##  9 10474         3.45
## 10 10013         3.25
## # … with 167 more rows

Interactive map

Read the modzcta shapefile obtained from here.

modzcta = st_read("data/MODZCTA_2010/MODZCTA_2010.shp") %>%
  janitor::clean_names()
## Reading layer `MODZCTA_2010' from data source 
##   `C:\Users\30535\OneDrive\Documents\Columbia courses\8105\crimeweather_final_project.github.io\data\MODZCTA_2010\MODZCTA_2010.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 178 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 913176 ymin: 120122 xmax: 1067382 ymax: 272844
## Projected CRS: Lambert_Conformal_Conic

Merge crime rate data with modzcta shapefile.

crime_rate_modzcta = geo_join(modzcta, crime_rate, "modzcta", "mod_zip", how = "inner")

Make interactive map of crime rate.

#label
labels = sprintf(
  "<strong>%s</strong><br/>Crime rate is %g&percnt;", crime_rate_modzcta$modzcta, crime_rate_modzcta$crime_rate) %>%
  lapply(htmltools::HTML)

#color palette
pal = colorBin(palette = "OrRd", 9, domain = crime_rate_modzcta$crime_rate)

map_interactive = crime_rate_modzcta %>%
  st_transform(crs = "+init=epsg:4326") %>%
  leaflet() %>%
  addProviderTiles(provider = "CartoDB.Positron") %>%
  addPolygons(label = labels,
              stroke = FALSE,
              smoothFactor = 0.5,
              opacity = 1,
              fillOpacity = 0.7,
              fillColor = ~ pal(crime_rate),
              highlightOptions = highlightOptions(weight = 5,
                                                  fillOpacity = 1,
                                                  color = "black",
                                                  opacity = 1,
                                                  bringToFront = TRUE)) %>%
  addLegend("bottomright",
            pal = pal,
            values = ~ crime_rate,
            title = "Crime cases per 100 residents",
            opacity = 0.7)
saveWidget(map_interactive, "nyc_crime_rate_map.html")
map_interactive

crime_month data preparation

crime_month = crime_df_map %>%
  group_by(mod_zip, date) %>%
  summarize(crime_counts = n())
crime_month$date = cut(crime_month$date, "30 days")
crime_month =  crime_month %>%
  group_by(mod_zip, date) %>%
  summarize(crime_counts = sum(crime_counts)) %>%
  rename(month_following = date)
crime_month = left_join(crime_month, pop_mod_zip, by = "mod_zip") %>%
  select(mod_zip, population, month_following, crime_counts)
crime_month$crime_rate = crime_month$crime_counts / crime_month$population
crime_month = crime_month %>%
  select(month_following, mod_zip, population, crime_counts, crime_rate) %>%
  arrange(month_following)
crime_month = geo_join(modzcta, crime_month, "modzcta", "mod_zip", how = "inner")

Shiny

library(shiny)
#define UI
ui = fluidPage(
  titlePanel("NYC crime visualization by modified zip code regions"),
  sidebarLayout(
    h5("blabla"),
    sidebarPanel(
      selectInput("date",
                  "Select a date (month starting from):",
                  choices = unique(crime_month$month_following))
    )
  ),
  mainPanel(
    tabsetPanel(
      tabPanel("Crime counts", leafletOutput("crime_counts")),
      tabPanel("Crime rate", leafletOutput("crime_rate"))
    )
  )
)

#define server logic
server = function(input, output){
  month_selected = reactive({
    w = crime_month %>%
      filter(month_following == input$date)
    return(w)
  })
  output$crime_counts = renderLeaflet({
    pal = colorBin(palette = "YlGn", 9, domain = crime_month$crime_counts)
    labels = sprintf(
      "<strong>%s</strong><br/>%g incidents in the month",
      month_selected()$mod_zip, month_selected()$crime_counts) %>%
      lapply(htmltools::HTML)
    month_selected() %>%
      st_transform(crs = "+init=epsg:4326") %>%
      leaflet() %>%
      addProviderTiles(provider = "CartoDB.Positron") %>%
      setView(-73.9, 40.7, zoom = 10) %>%
      addPolygons(label = labels,
                  stroke = FALSE,
                  smoothFactor = 0.5,
                  opacity = 1,
                  fillOpacity = 0.7,
                  fillColor = ~ pal(month_selected()$crime_counts),
                  highlightOptions = highlightOptions(weight = 5,
                                                      fillOpacity = 1,
                                                      color = "black",
                                                      opacity = 1,
                                                      bringToFront = TRUE)) %>%
      addLegend("bottomright",
                pal = pal,
                values = ~ crime_counts,
                title = "Crime incident counts",
                opacity = 0.7)
  })
  output$crime_rate = renderLeaflet({
    pal = colorBin(palette = "OrRd", 9, domain = crime_month$crime_rate)
    labels = sprintf(
      "<strong>%s</strong><br/>%g incidents in the month",
      month_selected()$mod_zip, month_selected()$crime_rate) %>%
      lapply(htmltools::HTML)
    month_selected() %>%
      st_transform(crs = "+init=epsg:4326") %>%
      leaflet() %>%
      addProviderTiles(provider = "CartoDB.Positron") %>%
      setView(-73.9, 40.7, zoom = 10) %>%
      addPolygons(label = labels,
                  stroke = FALSE,
                  smoothFactor = 0.5,
                  opacity = 1,
                  fillOpacity = 0.7,
                  fillColor = ~ pal(month_selected()$crime_rate),
                  highlightOptions = highlightOptions(weight = 5,
                                                      fillOpacity = 1,
                                                      color = "black",
                                                      opacity = 1,
                                                      bringToFront = TRUE)) %>%
      addLegend("bottomright",
                pal = pal,
                values = ~ crime_rate,
                title = "Crime incident rate",
                opacity = 0.7)
  })
}

Finally, run the shiny app!

Bug……

#shinyApp(ui = ui, server = server)